/**
 * @name 第一财经周刊
 * @appid 1844249
 */

import Creeper, { _ } from '../lib/creeper';

const app = new Creeper({
  domains: ['www.cbnweek.com'],
  scanUrls: ['https://www.cbnweek.com'],
  contentUrlRegexes: [/https:\/\/www\.cbnweek\.com\/articles\/\w+\/\d+/],
  fields: [
    {
      name: 'content',
      alias: '内容',
      selector: '//div[contains(@class,"article-content")]',
      required: true,
    },
  ],
});

app.onProcessScanPage((page, content, site) => {
  let iCount = 0;

  const re = /<a class="article-item-image[^(]+\(([^)]+)[^_]+[^-]+[^=]+="([^"]+)(?:[^>]+>){3}([^<]+)(?:[^>]+>){3}([^<]+)(?:[^>]+>){4}([^<]+)/g;
  const year = new Date().getFullYear();

  content.replace(re, (m, img, url, title, lead, time) => {
    const dt = _.getTime(`${year}年${time}`);

    if (_.expired(dt, '50h')) {
      return m;
    }

    iCount++;

    site.addUrl(url);
    _.store.set(url, {
      title,
      lead,
      time: String(+dt).slice(0, 10),
      image: _.fixImg(img, false),
    });

    return m;
  });

  console.log(`发现 ${iCount} 个符合要求的文章，准备爬取...`);

  return false;
});

app.start();
